/**
 * @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
 * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
 */
/**
 * @module paste-from-office/filters/image
 */
/* globals btoa */
import { Matcher, UpcastWriter } from 'ckeditor5/src/engine';
/**
 * Replaces source attribute of all `<img>` elements representing regular
 * images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data.
 *
 * @param documentFragment Document fragment on which transform images.
 * @param rtfData The RTF data from which images representation will be used.
 */
export function replaceImagesSourceWithBase64(documentFragment, rtfData) {
    if (!documentFragment.childCount) {
        return;
    }
    const upcastWriter = new UpcastWriter(documentFragment.document);
    const shapesIds = findAllShapesIds(documentFragment, upcastWriter);
    removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, upcastWriter);
    insertMissingImgs(shapesIds, documentFragment, upcastWriter);
    removeAllShapeElements(documentFragment, upcastWriter);
    const images = findAllImageElementsWithLocalSource(documentFragment, upcastWriter);
    if (images.length) {
        replaceImagesFileSourceWithInlineRepresentation(images, extractImageDataFromRtf(rtfData), upcastWriter);
    }
}
/**
 * Converts given HEX string to base64 representation.
 *
 * @internal
 * @param hexString The HEX string to be converted.
 * @returns Base64 representation of a given HEX string.
 */
export function _convertHexToBase64(hexString) {
    return btoa(hexString.match(/\w{2}/g).map(char => {
        return String.fromCharCode(parseInt(char, 16));
    }).join(''));
}
/**
 * Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
 * or Word shapes (which does not have RTF or Blob representation).
 *
 * @param documentFragment Document fragment from which to extract shape ids.
 * @returns Array of shape ids.
 */
function findAllShapesIds(documentFragment, writer) {
    const range = writer.createRangeIn(documentFragment);
    const shapeElementsMatcher = new Matcher({
        name: /v:(.+)/
    });
    const shapesIds = [];
    for (const value of range) {
        if (value.type != 'elementStart') {
            continue;
        }
        const el = value.item;
        const previousSibling = el.previousSibling;
        const prevSiblingName = previousSibling && previousSibling.is('element') ? previousSibling.name : null;
        // If shape element have 'o:gfxdata' attribute and is not directly before `<v:shapetype>` element it means it represent Word shape.
        if (shapeElementsMatcher.match(el) && el.getAttribute('o:gfxdata') && prevSiblingName !== 'v:shapetype') {
            shapesIds.push(value.item.getAttribute('id'));
        }
    }
    return shapesIds;
}
/**
 * Removes all `<img>` elements which represents Word shapes and not regular images.
 *
 * @param shapesIds Shape ids which will be checked against `<img>` elements.
 * @param documentFragment Document fragment from which to remove `<img>` elements.
 */
function removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, writer) {
    const range = writer.createRangeIn(documentFragment);
    const imageElementsMatcher = new Matcher({
        name: 'img'
    });
    const imgs = [];
    for (const value of range) {
        if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
            const el = value.item;
            const shapes = el.getAttribute('v:shapes') ? el.getAttribute('v:shapes').split(' ') : [];
            if (shapes.length && shapes.every(shape => shapesIds.indexOf(shape) > -1)) {
                imgs.push(el);
                // Shapes may also have empty source while content is paste in some browsers (Safari).
            }
            else if (!el.getAttribute('src')) {
                imgs.push(el);
            }
        }
    }
    for (const img of imgs) {
        writer.remove(img);
    }
}
/**
 * Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
 *
 * @param documentFragment Document fragment from which to remove shape elements.
 */
function removeAllShapeElements(documentFragment, writer) {
    const range = writer.createRangeIn(documentFragment);
    const shapeElementsMatcher = new Matcher({
        name: /v:(.+)/
    });
    const shapes = [];
    for (const value of range) {
        if (value.type == 'elementStart' && shapeElementsMatcher.match(value.item)) {
            shapes.push(value.item);
        }
    }
    for (const shape of shapes) {
        writer.remove(shape);
    }
}
/**
 * Inserts `img` tags if there is none after a shape.
 */
function insertMissingImgs(shapeIds, documentFragment, writer) {
    const range = writer.createRangeIn(documentFragment);
    const shapes = [];
    for (const value of range) {
        if (value.type == 'elementStart' && value.item.is('element', 'v:shape')) {
            const id = value.item.getAttribute('id');
            if (shapeIds.includes(id)) {
                continue;
            }
            if (!containsMatchingImg(value.item.parent.getChildren(), id)) {
                shapes.push(value.item);
            }
        }
    }
    for (const shape of shapes) {
        const attrs = {
            src: findSrc(shape)
        };
        if (shape.hasAttribute('alt')) {
            attrs.alt = shape.getAttribute('alt');
        }
        const img = writer.createElement('img', attrs);
        writer.insertChild(shape.index + 1, img, shape.parent);
    }
    function containsMatchingImg(nodes, id) {
        for (const node of nodes) {
            /* istanbul ignore else -- @preserve */
            if (node.is('element')) {
                if (node.name == 'img' && node.getAttribute('v:shapes') == id) {
                    return true;
                }
                if (containsMatchingImg(node.getChildren(), id)) {
                    return true;
                }
            }
        }
        return false;
    }
    function findSrc(shape) {
        for (const child of shape.getChildren()) {
            /* istanbul ignore else -- @preserve */
            if (child.is('element') && child.getAttribute('src')) {
                return child.getAttribute('src');
            }
        }
    }
}
/**
 * Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
 *
 * @param documentFragment Document fragment in which to look for `<img>` elements.
 * @returns result All found images grouped by source type.
 */
function findAllImageElementsWithLocalSource(documentFragment, writer) {
    const range = writer.createRangeIn(documentFragment);
    const imageElementsMatcher = new Matcher({
        name: 'img'
    });
    const imgs = [];
    for (const value of range) {
        if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
            if (value.item.getAttribute('src').startsWith('file://')) {
                imgs.push(value.item);
            }
        }
    }
    return imgs;
}
/**
 * Extracts all images HEX representations from a given RTF data.
 *
 * @param rtfData The RTF data from which to extract images HEX representation.
 * @returns Array of found HEX representations. Each array item is an object containing:
 *
 * * hex Image representation in HEX format.
 * * type Type of image, `image/png` or `image/jpeg`.
 */
function extractImageDataFromRtf(rtfData) {
    if (!rtfData) {
        return [];
    }
    const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
    const regexPicture = new RegExp('(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g');
    const images = rtfData.match(regexPicture);
    const result = [];
    if (images) {
        for (const image of images) {
            let imageType = false;
            if (image.includes('\\pngblip')) {
                imageType = 'image/png';
            }
            else if (image.includes('\\jpegblip')) {
                imageType = 'image/jpeg';
            }
            if (imageType) {
                result.push({
                    hex: image.replace(regexPictureHeader, '').replace(/[^\da-fA-F]/g, ''),
                    type: imageType
                });
            }
        }
    }
    return result;
}
/**
 * Replaces `src` attribute value of all given images with the corresponding base64 image representation.
 *
 * @param imageElements Array of image elements which will have its source replaced.
 * @param imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
 * The array should be the same length as `imageElements` parameter.
 */
function replaceImagesFileSourceWithInlineRepresentation(imageElements, imagesHexSources, writer) {
    // Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
    if (imageElements.length === imagesHexSources.length) {
        for (let i = 0; i < imageElements.length; i++) {
            const newSrc = `data:${imagesHexSources[i].type};base64,${_convertHexToBase64(imagesHexSources[i].hex)}`;
            writer.setAttribute('src', newSrc, imageElements[i]);
        }
    }
}
